CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     chaotic.f
C
C     This program uses a four-point stencil to smooth a 1,000 by 1,000
C     array.  The smoothing is done in parallel using domain decomposition
C     and chaotic relaxation.  That is, all tasks are given a fixed portion
C     of the problem domain -- part of the array -- over which they do the
C     smoothing.  At the end of each iteration they exchange borders.  To
C     simplify the code and get better efficiency, each task maintains a
C     copy of each neighbor's border in "ghost cells".  Ghost cells are an
C     extra row or column added to each side of the task's stencil array,
C     used to hold neighboring values as input to the computation.
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC

      PROGRAM MAIN
      IMPLICIT NONE
      INTEGER prob_size
      REAL close_enough
      PARAMETER (prob_size=1000, close_enough=0.1)
      INCLUDE 'stencil.h'
      REAL stencil
      COMMON stencil(0:prob_size-1,0:prob_size-1)

      CALL init_comm(prob_size)
      IF (my_task.EQ.0) PRINT *, "initializing the array."
      CALL init_stencil(stencil, prob_size, task_rows, task_cols)
      IF (my_task.EQ.0) PRINT *, "computing the stencil."
      CALL compute_stencil(stencil, task_rows, task_cols, close_enough)
      IF (my_task.EQ.0) PRINT *, "ending communication."
      CALL term_comm()
      IF (my_task.EQ.0) PRINT *, "ending the program."

      END


CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     init_comm
C
C     This routine brings up MPI and sets up all of the values needed to
C     successfully and efficiently do the smoothing operation in parallel.
C     Most variables used here are described in the .h file for this
C     program.
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
      SUBROUTINE init_comm(prob_size)
      IMPLICIT NONE
      INTEGER prob_size
      INCLUDE 'mpif.h'
      INCLUDE 'stencil.h'

      CALL MPI_Init(ierror)
      CALL MPI_Comm_size(MPI_Comm_world, num_tasks, ierror)
      CALL MPI_Comm_rank(MPI_Comm_world, my_task, ierror)
      dims(1) = sqrt(real(num_tasks))
      dims(2) = num_tasks / dims(1)
      wrap(1) = .false.
      wrap(2) = .false.
      CALL MPI_Cart_create(MPI_Comm_world, grid_rank, dims, wrap,
     1   .true., my_comm, ierror)
      CALL MPI_Cart_shift(my_comm, 0, 1, n_task, s_task, ierror)
      CALL MPI_Cart_shift(my_comm, 1, 1, w_task, e_task, ierror)
      CALL MPI_Cart_get(my_comm, 2, dims, wrap, coords, ierror)

      rowwidth  = 1 + (prob_size - 2 - 1) / dims(1)
      colwidth  = 1 + (prob_size - 2 - 1) / dims(2)
      row_st    = 1 + coords(1) * rowwidth
      row_end   = MIN(prob_size - 2, row_st + rowwidth - 1)
      col_st    = 1 + coords(2) * colwidth
      col_end   = MIN(prob_size - 2, col_st + colwidth - 1)
      task_rows = 2 + 1 + row_end - row_st
      task_cols = 2 + 1 + col_end - col_st

      CALL MPI_Type_vector(task_cols-2, 1, task_rows, MPI_Real,
     1   row_type, ierror)
      CALL MPI_Type_vector(task_rows-2, 1,         1, MPI_Real,
     1   col_type, ierror)
      CALL MPI_Type_commit(row_type, ierror)
      CALL MPI_Type_commit(col_type, ierror)

      END


CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     init_stencil
C
C     This routine reads in the initial values for the array over which
C     smoothing will occur.  Since the program is executed in parallel,
C     and this task operates on only a portion of the whole array, only
C     those records which are needed are read in.  But since Fortran
C     does not offer the ability to read in partial records, the whole
C     record is read into a buffer, then the useful part is copied into
C     the array.
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
      SUBROUTINE init_stencil(stencil, prob_size, m, n)
      IMPLICIT NONE
      INCLUDE 'stencil.h'
      INTEGER prob_size, m, n, i, j
      REAL stencil(0:m-1, 0:n-1)
      REAL buffer(0:prob_size-1)

      OPEN(UNIT=1, FILE="stencil.dat", STATUS="OLD", ACTION="READ",
     1   FORM="UNFORMATTED", ACCESS="DIRECT", RECL=prob_size*4)
      DO j=col_st-1, col_end+1
         READ (UNIT=1, REC=j+1) (buffer(i), i=0, prob_size-1)
         DO i=0, m-1
            stencil(i, j-(col_st-1)) = buffer(i + row_st - 1)
         END DO
      END DO
      CLOSE(UNIT=1)

      END


CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     compute_stencil
C
C     This routine smooths the values of the array.  The routine first
C     exchanges the borders it shares with its neighbors, then it sweeps
C     over consecutive rows of its portion of the array until it reaches
C     the end.  Next it shares its maximum local error with all other
C     tasks in the program.  If the maximum local error over all tasks,
C     called the global error (global_err) is greater than the tolerance,
C     the program cycles through another iteration.
C
C     Note that all tasks are busy at the same time and have roughly the
C     same amount of work so there is minimal load imbalance.
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
      SUBROUTINE compute_stencil(stencil, m, n, close_enough)
      IMPLICIT NONE
      INCLUDE 'mpif.h'
      INCLUDE 'stencil.h'
      INTEGER m, n
      REAL stencil(0:m-1, 0:n-1)
      REAL close_enough
      REAL local_err, global_error, old_value
      INTEGER i, j, iter_count

      iter_count = 0
 100  CONTINUE
         local_err = 0.0
	 iter_count = iter_count + 1
 	 CALL exchange(stencil, m, n)

	 DO j=1, n-2
	    DO i=1, m-2
	       old_value = stencil(i,j)

	       stencil(i,j) = ( stencil(i-1, j ) +
     1                          stencil(i+1, j ) +
     2                          stencil( i ,j-1) +
     3                          stencil( i ,j+1) ) / 4

               local_err = MAX(local_err,ABS(old_value-stencil(i,j)))
	    END DO
	 END DO
	 CALL MPI_Allreduce(local_err, global_error, 1, MPI_Real,
     1      MPI_Max, MPI_Comm_world, ierror)

         IF(MOD(iter_count,100).EQ.0)PRINT *, iter_count, global_error
      IF (close_enough.LT.global_error) GOTO 100
      PRINT *, "convergence reached after", iter_count, "iterations."

      END


CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     exchange
C
C     This routine exchanges all four borders, north, south, east, and
C     west, placing the incoming values in the appropriate "ghost cells".
C     It first initiates all receives as asynchronous receives, then all
C     sends as asynchronous sends, then it waits for all sends and receives
C     to complete.
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
      SUBROUTINE exchange(stencil, m, n)
      IMPLICIT NONE
      INCLUDE 'mpif.h'
      INCLUDE 'stencil.h'
      INTEGER m, n
      REAL stencil(0:m-1, 0:n-1)
      INTEGER request(8), my_status(MPI_Status_size), i

      CALL MPI_Irecv(stencil(  0,  1), 1, row_type, n_task, 0,
     1   my_comm, request(1), ierror)
      CALL MPI_Irecv(stencil(m-1,  1), 1, row_type, s_task, 0,
     1   my_comm, request(2), ierror)
      CALL MPI_Irecv(stencil(  1,  0), 1, col_type, w_task, 0,
     1   my_comm, request(3), ierror)
      CALL MPI_Irecv(stencil(  1,n-1), 1, col_type, e_task, 0,
     1   my_comm, request(4), ierror)

      CALL MPI_Isend(stencil(  1,  1), 1, row_type, n_task, 0,
     1   my_comm, request(5), ierror)
      CALL MPI_Isend(stencil(m-2,  1), 1, row_type, s_task, 0,
     1   my_comm, request(6), ierror)
      CALL MPI_Isend(stencil(  1,  1), 1, col_type, w_task, 0,
     1   my_comm, request(7), ierror)
      CALL MPI_Isend(stencil(  1,n-2), 1, col_type, e_task, 0,
     1   my_comm, request(8), ierror)

      DO i=1, 8
         CALL MPI_Wait(request(i), my_status, ierror)
      END DO

      END


CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C     term_comm
C
C     This routine frees the MPI message defined types and terminates MPI.
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
      SUBROUTINE term_comm()
      IMPLICIT NONE
      INCLUDE 'stencil.h'

      CALL MPI_Type_free(col_type, ierror)
      CALL MPI_Type_free(row_type, ierror)
      CALL MPI_Finalize(ierror)

      END
